package com.luchao.jsoup;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.net.URL;

/**
 * 搜狗微信公众号文章
 * https://weixin.sogou.com/
 */
public class HtmlParseUtilSogouWx {
    private static Integer num = 10;

    public static void main(String[] args) throws IOException {
        wx();
    }

    /**
     * 热门文章
     * @throws IOException
     */
    private static void wx() throws IOException {
        String url = "https://weixin.sogou.com/";
        Document document = Jsoup.parse(new URL(url), 30000);
        Elements element = document.getElementsByTag("h3");
        for (Element element1 : element) {
            String str = element1.toString().replace("&amp;", "\\&");
            str = str.replace("\\", "");
            System.out.println(str.toString().substring(str.toString().indexOf("href=\"") + 6, str.toString().indexOf("\" target=\"")));
        }
    }


}
